import string
import re
import sys
sys.path.append('.//Tools')
import Constant
import nltk
paramEnd=['.','?','"','!']
upperLetter=[chr(i) for i in range(65,91)]
paramIgnore=['\n','\r']
def ParasSplit(s):
    
    s=s.replace('%',' percent ')
    #print(upperLetter)
    for x in paramIgnore:
        s=s.replace(x,'')
    for x in paramEnd:
        for y in upperLetter:
            s=s.replace(x+y,x+' '+y)
    
    '''
    s=[s]
    for v in paramEnd:
        ns=[]
        for c in s:
            lst=c[len(c)-1]
            
            c=c.split(v)

            cc=[]
            for x in c:
                x=x.strip()
                if len(x)>0:
                    cc.append(x)
            c=cc
            for cc in c:
                ns.append(cc+v)
            if lst!=v and len(ns)>0:
                ns[len(ns)-1]=ns[len(ns)-1].replace(v,'')
        s=ns
    '''
    s=nltk.sent_tokenize(s)
   # print(len(s))
    return s
def ParaSplit(s):
    s=ParasSplit(s)
    print(len(s))
    ans=[]
    for i in range(0,int(len(s)/Constant.SentencesNumInPara)):
        tp=[]
        for j in range(0,Constant.SentencesNumInPara):
            tp.append(s[i*Constant.SentencesNumInPara+j])
        ans.append(tp)
                      
    return ans
        
    

#f=open('in.txt','r',encoding = 'UTF-8')
#print(ParasSplit(f.read()))

